import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr, stats, linregress
data=pd.read_csv("data_Q.csv")
data.columns=['Location','year','month', 'pH','oxid' ,'NO3','hard','dry','Cl','SO4','Fe','F','Cu']
df_Q = pd.DataFrame(data)
dp_Q = df_Q[['pH','oxid' ,'NO3','hard','dry','Cl','SO4','Fe','F','Cu']]
dp_Q.head(10)
dp=dp_Q.iloc[6:]
columns = dp.columns
for i in range(len(columns)):
for j in range(i + 1, len(columns)):
# Perform linear regression
slope, intercept, r_value, p_value, std_err = linregress(dp[columns[i]], dp[columns[j]])
r_squared = r_value ** 2
# Plot regression
plt.figure(figsize=(6, 4))
sns.regplot(x=columns[i], y=columns[j], data=dp, scatter_kws={'s': 50}, line_kws={"color": "red"})
plt.title(f'Regression Plot of {columns[i]} vs {columns[j]}')
plt.xlabel(columns[i])
plt.ylabel(columns[j])
# Annotate plot with R^2 value
plt.text(0.05, 0.9, f'$R^2 = {r_squared:.3f}$', transform=plt.gca().transAxes, fontsize=12, color='blue')
plt.show()
# Calculate the correlation matrix
corr_matrix = dp.corr()
# Optionally visualize the correlation matrix using a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title("Correlation Matrix of Water Quality Parameters")
plt.show()
# Initialize an empty DataFrame to hold the p-values
p_values = pd.DataFrame(np.zeros((dp.shape[1], dp.shape[1])), columns=dp.columns, index=dp.columns)
# Calculate the Pearson correlation coefficient and p-value for each pair
for col1 in dp.columns:
for col2 in dp.columns:
if col1 != col2:
corr, p_val = pearsonr(dp[col1], dp[col2])
p_values.loc[col1, col2] = p_val
else:
p_values.loc[col1, col2] = np.nan # No p-value for correlation of a variable with itself
# Optionally visualize the p-value matrix
plt.figure(figsize=(8, 6))
sns.heatmap(p_values, annot=True, cmap='coolwarm', vmin=0, vmax=0.1)
plt.title("P-value Matrix of Water Quality Parameters (Significance Level)")
plt.show()
## standart values
dp=dp_Q
st_up=dp.iloc[0]
st_low=dp.iloc[1]
x_up=dp.iloc[4]
x_low=dp.iloc[5]
## Results
def result(x):
if x<30:
return 'Poor'
elif x<50:
return 'Marginal'
elif x<80:
return 'Fair'
else:
return 'Good'
## Sub index calculate
#pH=0, oxid=1 ,NO3=3,hard=3,dry=4,Cl=5,SO4=6,Fe=7,F=8,Cu=9
def SI1(x,x_up,x_low):
return max(0,(x_up-x)/(x_up-x_low)*100)
def SI2(x,x_up,x_low):
return max(0,(x-x_low)/(x_up-x_low)*100)
def SI(i,x,x_up,x_low):
a=[1,2,3,4,5,6,7,8,9]
if i in a:
return SI1(x,x_up,x_low)
else:
if x<7.5:
return SI2(x,x_up,x_low)
elif x>8.5:
return SI1(x,x_up,x_low)
else:
return 100
def NSF(x):
## Sub indexing
s=[]
for i in range(len(x)):
si=SI(i,x.iloc[i],x_up.iloc[i],x_low.iloc[i])
s.append(si)
## weights
w=dp.iloc[2]
## calculation NSF WQI
NSF=round(np.dot(s,w))
return NSF, result(NSF)
x=dp.iloc[6]
NSF(x)
def CCME(x):
Total_p=len(x)
Failed_p=0
for i in range(len(x)):
if x.iloc[i]>st_up.iloc[i]:
Failed_p=+1
F1=Failed_p/Total_p*100
F2=F1
exceursion=0
for i in range(len(x)):
if x.iloc[i]>st_up.iloc[i]:
exceursion=exceursion+x.iloc[i]/st_up.iloc[i]-1
nse=exceursion/Total_p
F3=nse/(1+nse)*100
CCME=100-np.sqrt(F1*F1+F2*F2+F3*F3)/1.732
return round(CCME), result(CCME)
x=dp.iloc[6]
CCME(x)
def WA(x):
# calculating weights
ssi=[1/s for s in st_up]
S=np.sum(ssi)
w=[s/S for s in ssi]
## Sub indexing
s=[]
for i in range(len(x)):
si=100-SI(i,x.iloc[i],st_up.iloc[i],st_low.iloc[i])
s.append(si)
# calculating index
WA=max(0,100-np.dot(s,w))
return round(WA), result(WA)
x=dp.iloc[6]
WA(x)
## Sub index calculate
# pH=0, oxid=1 ,NO3=3,hard=3,dry=4,Cl=5,SO4=6,Fe=7,F=8,Cu=9
def QI1(x,x_up,x_low):
return min(100,(x-x_low)/(x_up-x_low)*100)
def QI(i,x,x_up,x_low):
a=[1,2,3,4,5,6,7,8,9]
if i in a:
return QI1(x,x_up,x_low)
else:
return abs(QI1(x,x_up,(x_up+x_low)/2))
# Calculating DWA WQI
def DWA(x):
## Sub indexing
s=[]
ss=[]
for i in range(len(x)):
si=QI(i,x.iloc[i],st_up.iloc[i],st_low.iloc[i])
s.append(si)
ss.append(np.exp((si/100)))
S=np.sum(ss)
# weights
w=[x/S for x in ss]
#Calculating WQI
DWA=100-max(0,np.dot(s,w))
return round(DWA), result(DWA)
x=dp.iloc[6]
DWA(x)
t_NSF=[]
t_CCME=[]
t_WA=[]
t_DWA=[]
d_NSF=[]
d_CCME=[]
d_WA=[]
d_DWA=[]
for i in range(len(dp.iloc[6:])):
x=dp.iloc[i+6]
t_NSF.append(NSF(x)[0])
t_CCME.append(CCME(x)[0])
t_WA.append(WA(x)[0])
t_DWA.append(DWA(x)[0])
d_NSF.append(NSF(x)[1])
d_CCME.append(CCME(x)[1])
d_WA.append(WA(x)[1])
d_DWA.append(DWA(x)[1])
df=dp.iloc[6:]
df = df.assign(NSF=d_NSF)
df['CCME']=d_CCME
df['WA']=d_WA
df['DWA']=d_DWA
df['NSFn']=t_NSF
df['CCMEn']=t_CCME
df['WAn']=t_WA
df['DWAn']=t_DWA
df.head(5)
df.to_csv('data_Eclipsing_uzb.csv', index=False)
mean_values=dp_Q.iloc[3]
std_up=dp_Q.iloc[0]
std_values=0.2*(st_up-mean_values)
num_samples = 10000 # Adjust the number of samples as needed
new_data = np.random.normal(mean_values, std_values, (num_samples, len(mean_values)))
good_df = pd.DataFrame(new_data, columns=dp_Q.columns)
good_df = good_df.map(lambda x: abs(x))
for i, row in good_df.iterrows():
bool_value = 1
for j in range(len(row)):
if not (mean_values.iloc[j]-1*std_values.iloc[j] <= row.iloc[j] <=mean_values.iloc[j]+ 1*std_values.iloc[j]):
bool_value = 0
break
if bool_value == 0:
good_df=good_df.drop(i, axis=0)
print(len(good_df))
good_df.head()
Biz quyidagi tasdiq uchun modelni tekshiramiz.
Agar hamma parametrlar standart qiymatiga nisbatan 20% dan ko'p bo'lmagan chetlanishga ega bo'lsa, bu suv yaxshi "Good".
t_NSF=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(good_df)):
x=good_df.iloc[i]
if NSF(x)[1]=='Good': t_NSF+=1
if CCME(x)[1]=='Good': t_CCME+=1
if WA(x)[1]=='Good': t_WA+=1
if DWA(x)[1]=='Good': t_DWA+=1
tg={'NSF':t_NSF,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(tg)
# Extract data for plotting
labels = list(tg.keys())
values = list(tg.values())
# Create the bar chart
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
# Add labels and title
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart of Data')
# Rotate labels for better readability
plt.xticks(rotation=45)
# Show the plot
plt.show()
from scipy.stats import binomtest
H0 = 0.95 # null hypothesis
alpha = 0.05 # Significance level
L=len(good_df)
print(L)
print('p-values')
for model_name in tg:
p_values = binomtest(tg[model_name], L, p=H0, alternative='less').pvalue
if p_values < alpha:
text='H0 qabul qilinmasligiga statistik asos bor'
else:
text='H0 qabul qilinishiga statistik asos bor'
print(model_name,'----',f"{p_values:.3f}",'----',text)
3 tadan ko'p parametri normadan oshgan suvni yomon 'poor' deymiz.
mean_values=dp_Q.iloc[3]
std_up=dp_Q.iloc[0]
std_values=2*(st_up-mean_values)
num_samples = 1000 # Adjust the number of samples as needed
new_data = np.random.normal(mean_values, std_values, (num_samples, len(mean_values)))
poor_df = pd.DataFrame(new_data, columns=dp_Q.columns)
poor_df = poor_df.map(lambda x: abs(x))
for i, row in poor_df.iterrows():
bool_value = 0
for j in range(len(row)):
if not (mean_values.iloc[j]-1*std_values.iloc[j] <= row.iloc[j] <=mean_values.iloc[j]+ 1*std_values.iloc[j]):
bool_value = bool_value+1
if bool_value <2:
poor_df=poor_df.drop(i, axis=0)
print(len(poor_df))
poor_df.head()
t_NSF=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(poor_df)):
x=poor_df.iloc[i]
if NSF(x)[1]=='Poor': t_NSF+=1
if CCME(x)[1]=='Poor': t_CCME+=1
if WA(x)[1]=='Poor': t_WA+=1
if DWA(x)[1]=='Poor': t_DWA+=1
tp={'NSF':t_NSF,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(tp)
# Extract data for plotting
labels = list(tp.keys())
values = list(tp.values())
# Create the bar chart
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
# Add labels and title
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart of Data')
# Rotate labels for better readability
plt.xticks(rotation=45)
# Show the plot
plt.show()
H0 = 0.95 # null hypothesis
alpha = 0.05 # Significance level
L=len(poor_df)
print(L)
print('p-values')
for model_name in tp:
p_values = binomtest(tp[model_name], L, p=H0, alternative='less').pvalue
if p_values < alpha:
text='H0 qabul qilinmasligiga statistik asos bor'
else:
text='H0 qabul qilinishiga statistik asos bor'
print(model_name,'----',f"{p_values:.3f}",'----',text)
Agar hamma parametrlar standart qiymatiga nisbatan 25% dan ko'p va 50% dan kam chetlanishga ega bo'lsa, bu suv durust "Fair".
mean_values=dp_Q.iloc[3]
std_up=dp_Q.iloc[0]
std_values=0.001*(st_up-mean_values)
mean_values=mean_values+250*std_values
num_samples = 10000 # Adjust the number of samples as needed
new_data = np.random.normal(mean_values, std_values, (num_samples, len(mean_values)))
fair_df = pd.DataFrame(new_data, columns=dp_Q.columns)
fair_df = fair_df.map(lambda x: abs(x))
mean_values=dp_Q.iloc[3]
std_values=0.5*(st_up-mean_values)
for i, row in fair_df.iterrows():
bool_value = 1
for j in range(len(row)):
if (not (mean_values.iloc[j]-1*std_values.iloc[j] < row.iloc[j] <mean_values.iloc[j]+ 1*std_values.iloc[j])) or (mean_values.iloc[j]-0.5*std_values.iloc[j] < row.iloc[j] <mean_values.iloc[j]+ 0.5*std_values.iloc[j]):
bool_value = 0
break
if bool_value ==0:
fair_df=fair_df.drop(i, axis=0)
print(len(fair_df))
fair_df.head()
t_NSF=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(fair_df)):
x=fair_df.iloc[i]
if NSF(x)[1]== 'Fair': t_NSF+=1
if CCME(x)[1]=='Fair': t_CCME+=1
if WA(x)[1]== 'Fair': t_WA+=1
if DWA(x)[1]== 'Fair': t_DWA+=1
tf={'NSF':t_NSF,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(tf)
# Extract data for plotting
labels = list(tf.keys())
values = list(tf.values())
# Create the bar chart
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
# Add labels and title
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart of Data')
# Rotate labels for better readability
plt.xticks(rotation=45)
# Show the plot
plt.show()
H0 = 0.95 # null hypothesis
alpha = 0.05 # Significance level
L=len(fair_df)
print(L)
print('p-values')
for model_name in tf:
p_values = binomtest(tf[model_name], L, p=H0, alternative='less').pvalue
if p_values < alpha:
text='H0 qabul qilinmasligiga statistik asos bor'
else:
text='H0 qabul qilinishiga statistik asos bor'
print(model_name,'----',f"{p_values:.3f}",'----',text)
Agar hamma parametrlar standart qiymatiga nisbatan 60% dan ko'p va 90% dan kam chetlanishga ega bo'lsa, bu suv yomonga yaqin "Marginal".
mean_values=dp_Q.iloc[3]
std_up=dp_Q.iloc[0]
std_values=0.05*(st_up-mean_values)
mean_values=mean_values+13*std_values
num_samples = 1000 # Adjust the number of samples as needed
new_data = np.random.normal(mean_values, std_values, (num_samples, len(mean_values)))
Marginal_df = pd.DataFrame(new_data, columns=dp_Q.columns)
Marginal_df = Marginal_df.map(lambda x: abs(x))
mean_values=dp_Q.iloc[3]
std_values=(st_up-mean_values)
for i, row in Marginal_df.iterrows():
bool_value = 1
for j in range(len(row)):
if (not (mean_values.iloc[j]-0.8*std_values.iloc[j] < row.iloc[j] <mean_values.iloc[j]+ 0.8*std_values.iloc[j])) or (mean_values.iloc[j]-0.6*std_values.iloc[j] < row.iloc[j] <mean_values.iloc[j]+ 0.6*std_values.iloc[j]):
bool_value = 0
break
if bool_value ==0:
Marginal_df=Marginal_df.drop(i, axis=0)
print(len(Marginal_df))
Marginal_df.head()
t_NSF=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(Marginal_df)):
x=Marginal_df.iloc[i]
if NSF(x)[1]== 'Marginal': t_NSF+=1
if CCME(x)[1]=='Marginal': t_CCME+=1
if WA(x)[1]== 'Marginal': t_WA+=1
if DWA(x)[1]== 'Marginal': t_DWA+=1
tm={'NSF':t_NSF,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(tm)
# Extract data for plotting
labels = list(tm.keys())
values = list(tm.values())
# Create the bar chart
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
# Add labels and title
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart of Data')
# Rotate labels for better readability
plt.xticks(rotation=45)
# Show the plot
plt.show()
H0 = 0.95 # null hypothesis
alpha = 0.05 # Significance level
L=len(Marginal_df)
print(L)
print('p-values')
for model_name in tm:
p_values = binomtest(tm[model_name], L, p=H0, alternative='less').pvalue
if p_values < alpha:
text='H0 qabul qilinmasligiga statistik asos bor'
else:
text='H0 qabul qilinishiga statistik asos bor'
print(model_name,'----',f"{p_values:.3f}",'----',text)
mix_df = pd.concat([good_df, fair_df, Marginal_df, poor_df], ignore_index=True)
print(len(mix_df))
mix_df.head()
real_dist={'Good':len(good_df),'Fair':len(fair_df),'Marginal':len(Marginal_df),'Poor':len(poor_df)}
dist={}
for model_name in tm:
dist[model_name]={'Good':tg[model_name],'Fair':tf[model_name],'Marginal':tm[model_name],'Poor':tp[model_name]}
categories = list(real_dist.keys())
real_values = list(real_dist.values())
for model_name in tm:
model_values = list(dist[model_name].values())
# Set up the bar chart positions
x = np.arange(len(categories)) # the label locations
width = 0.35 # the width of the bars
# Create the bar chart
fig, ax = plt.subplots(figsize=(4, 3))
# Plot bars for real distribution
ax.bar(x - width/2, real_values, width, label='Real Distribution', color='blue')
# Plot bars for model distribution
ax.bar(x + width/2, model_values, width, label=f'{model_name} Distribution', color='orange')
ax.set_xlabel('Categories')
ax.set_ylabel('Frequencies')
ax.set_title('Comparison of Real Distribution and Model Distribution')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()
# Display the plot
plt.show()
from scipy.stats import chisquare
# Extract observed and expected values in the same order
observed = np.array(list(real_dist.values()))
for model_name in tm:
print(model_name)
expected = np.array(list(dist[model_name].values()))
difference = (observed.sum()-expected.sum()) / len(observed)
# Add the difference equally to each observed value
expected = expected + difference
chi2_stat, p_value = chisquare(f_obs=observed, f_exp=expected)
print(f"Chi-square statistic: {chi2_stat}")
print(f"P-value: {p_value}")
# Interpret the result (usually alpha = 0.05)
alpha = 0.05
if p_value < alpha:
print("There is a significant difference between the real distribution and the",model_name," model distribution.")
else:
print("There is no significant difference between the real distribution and the",model_name," model distribution.")
print()
data=pd.read_csv("data_Eclipsing_uzb.csv")
data.columns=['pH','oxid' ,'NO3','hard','dry','Cl','SO4','Fe','F','Cu', 'NSF', 'CCME', 'WA', 'DWA', 'NSFn', 'CCMEn', 'WAn', 'DWAn']
df = pd.DataFrame(data)
df = df[['NSF', 'CCME', 'WA', 'DWA']]
categories = ['Good', 'Fair', 'Marginal', 'Poor']
freq_dict = {}
for col in df.columns:
value_counts = df[col].value_counts().to_dict()
freq_dict[col] = {category: value_counts.get(category, 0) for category in categories}
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
if len(freq_dict) > 1:
axes = axes.flatten()
for i, (model, counts) in enumerate(freq_dict.items()):
total = sum(counts.values())
percentages = [(counts[cat] / total) * 100 for cat in categories]
axes[i].bar(categories, percentages, color=['blue', 'orange', 'green', 'red'])
axes[i].set_title(f'{model} Distribution (%)')
plt.tight_layout()
plt.show()
data=pd.read_csv("data_I.csv") # Irland Summer
data.columns=['Location','CHL','DPX','MRP','DIN','AMN','BOD','pH','TEMP','TON','TRAN']
df_I = pd.DataFrame(data)
dp_I = df_I[['CHL','DPX','MRP','DIN','AMN','BOD','pH','TEMP','TON','TRAN']]
df_I.head(5)
# Calculate the correlation matrix
dp=dp_I.iloc[6:] # Irland data
corr_matrix = dp.corr()
# Optionally visualize the correlation matrix using a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title("Correlation Matrix of Water Quality Parameters")
plt.show()
# Initialize an empty DataFrame to hold the p-values
p_values = pd.DataFrame(np.zeros((dp_I.shape[1], dp_I.shape[1])), columns=dp_I.columns, index=dp_I.columns)
# Calculate the Pearson correlation coefficient and p-value for each pair
for col1 in dp.columns:
for col2 in dp.columns:
if col1 != col2:
corr, p_val = pearsonr(dp[col1], dp[col2])
p_values.loc[col1, col2] = p_val
else:
p_values.loc[col1, col2] = np.nan # No p-value for correlation of a variable with itself
# Optionally visualize the p-value matrix
plt.figure(figsize=(8, 8))
sns.heatmap(p_values, annot=True, cmap='coolwarm', vmin=0, vmax=0.1)
plt.title("P-value Matrix of Water Quality Parameters (Significance Level)")
plt.show()
## standart values for Irland
dp=dp_I
st_up=dp.iloc[0]
st_low=dp.iloc[1]
x_up=dp.iloc[4]
x_low=dp.iloc[5]
## Sub index calculate
# CHL=0,DOX=1,MRP=2,DIN=3,AMN=4,BOD=5,pH=6,TEMP=7,TON=8,TRAN=9
def SI1(x,x_up,x_low):
return max(0,(x_up-x)/(x_up-x_low)*100)
def SI2(x,x_up,x_low):
return max(0,(x-x_low)/(x_up-x_low)*100)
def SI(i,x,x_up,x_low):
a=[0,2,3,4,5,8]
if i in a:
return SI1(x,x_up,x_low)
if i==1:
if x>100:
return SI2(x,x_up,x_low)
elif x<100:
return SI1(x,x_up,x_low)
else:
return 100
if i==6:
if x<7.5:
return SI2(x,x_up,x_low)
elif x>8.5:
return SI1(x,x_up,x_low)
else:
return 100
if i==7:
if x<25:
return 100
else:
return 0
if i==9:
if x<1:
return SI1(x,x_up,x_low)
else:
return 100
## Results
def result(x):
if x<30:
return 'Poor'
elif x<50:
return 'Marginal'
elif x<80:
return 'Fair'
else:
return 'Good'
def NSF(x):
## Sub indexing
s=[]
for i in range(len(x)):
si=SI(i,x.iloc[i],x_up.iloc[i],x_low.iloc[i])
s.append(si)
## weights
w=dp.iloc[2]
## calculation NSF WQI
NSF=round(np.dot(s,w))
return NSF, result(NSF)
x=dp.iloc[6]
NSF(x)
def WQM(x):
## Sub indexing
s=[]
for i in range(len(x)):
si=SI(i,x.iloc[i],x_up.iloc[i],x_low.iloc[i])*SI(i,x.iloc[i],st_up.iloc[i],st_low.iloc[i])
s.append(si)
## weights
w=dp.iloc[3]
## calculation NSF WQI
WQM=round(np.sqrt(np.dot(s,w)))
return WQM, result(WQM)
x=dp.iloc[6]
WQM(x)
def CCME(x):
Total_p=len(x)
Failed_p=0
for i in range(len(x)):
if x.iloc[i]>st_up.iloc[i]:
Failed_p=+1
F1=Failed_p/Total_p*100
F2=F1
exceursion=0
for i in range(len(x)):
if x.iloc[i]>st_up.iloc[i]:
exceursion=exceursion+x.iloc[i]/st_up.iloc[i]-1
nse=exceursion/Total_p
F3=nse/(1+nse)*100
CCME=100-np.sqrt(F1*F1+F2*F2+F3*F3)/1.732
return round(CCME), result(CCME)
x=dp.iloc[6]
CCME(x)
def WA(x):
# calculating weights
ssi=[1/s for s in st_up]
S=np.sum(ssi)
w=[s/S for s in ssi]
## Sub indexing
s=[]
for i in range(len(x)):
si=100-SI(i,x.iloc[i],st_up.iloc[i],st_low.iloc[i])
s.append(si)
# calculating index
WA=max(0,100-np.dot(s,w))
return round(WA), result(WA)
x=dp.iloc[6]
WA(x)
## Sub index calculate
# CHL=0,DOX=1,MRP=2,DIN=3,AMN=4,BOD=5,pH=6,TEMP=7,TON=8,TRAN=9
def QI1(x,x_up,x_low):
return min(100,(x-x_low)/(x_up-x_low)*100)
def QI(i,x,x_up,x_low):
a=[0,2,3,4,5,8]
if i in a:
return QI1(x,x_up,x_low)
if i==1:
return abs(QI1(x,x_up,(x_up+x_low)/2))
if i==6:
return abs(QI1(x,x_up,(x_up+x_low)/2))
if i==7:
if x<25:
return 0
else:
return 100
if i==9:
if x<1:
return (1-x)*100
else:
return 0
def DWA(x):
## Sub indexing
s=[]
ss=[]
for i in range(len(x)):
si=QI(i,x.iloc[i],st_up.iloc[i],st_low.iloc[i])
s.append(si)
ss.append(np.exp((si/100))-1)
S=np.sum(ss)
# weights
w=[x/S for x in ss]
#print(w)
#print(s)
#Calculating WQI
DWA=100-max(0,np.dot(s,w))
return round(DWA), result(DWA)
x=dp.iloc[34]
#print(x)
DWA(x)
t_NSF=[]
t_WQM=[]
t_CCME=[]
t_WA=[]
t_DWA=[]
d_NSF=[]
d_WQM=[]
d_CCME=[]
d_WA=[]
d_DWA=[]
for i in range(len(dp.iloc[6:])):
x=dp.iloc[i+6]
t_NSF.append(NSF(x)[0])
t_WQM.append(WQM(x)[0])
t_CCME.append(CCME(x)[0])
t_WA.append(WA(x)[0])
t_DWA.append(DWA(x)[0])
d_NSF.append(NSF(x)[1])
d_WQM.append(WQM(x)[1])
d_CCME.append(CCME(x)[1])
d_WA.append(WA(x)[1])
d_DWA.append(DWA(x)[1])
df=dp.iloc[6:]
df = df.assign(NSF=d_NSF)
df['WQM']=d_WQM
df['CCME']=d_CCME
df['WA']=d_WA
df['DWA']=d_DWA
df['NSFn']=t_NSF
df['WQMn']=t_WQM
df['CCMEn']=t_CCME
df['WAn']=t_WA
df['DWAn']=t_DWA
df.head(5)
Gepoteza tekshirish uchun kreteriy yozamiz.
Shu tasdiqlarni qaysi model ko'proq qanoatlantiririshini tekshiramiz.
def xato_bor(x):
for i in range(len(x)):
if x.iloc[i] > st_up.iloc[i] or x.iloc[i] < st_low.iloc[i]:
return True
return False
t_NSF=0
t_WQM=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(dp.iloc[6:])):
x=dp.iloc[i+6]
if xato_bor(x)==True:
if NSF(x)[1]=='Good': t_NSF+=1
if WQM(x)[1]=='Good': t_WQM+=1
if CCME(x)[1]=='Good': t_CCME+=1
if WA(x)[1]=='Good': t_WA+=1
if DWA(x)[1]=='Good': t_DWA+=1
else:
if NSF(x)[1]=='Poor': t_NSF+=1
if WQM(x)[1]=='Poor': t_WQM+=1
if CCME(x)[1]=='Poor': t_CCME+=1
if WA(x)[1]=='Poor': t_WA+=1
if DWA(x)[1]=='Poor': t_DWA+=1
t={'NSF':t_NSF,'WQM':t_WQM,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(t)
# Extract data for plotting
labels = list(t.keys())
values = list(t.values())
# Create the bar chart
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
# Add labels and title
plt.xlabel('Categories')
plt.ylabel('Values')
plt.title('Bar Chart of Data')
# Rotate labels for better readability
plt.xticks(rotation=45)
# Show the plot
plt.show()
df1=dp.iloc[6:]
# Calculate mean and standard deviation
mean_values = df1.mean()
std_values = df1.std()
# Create a new DataFrame with the results
results_df = pd.DataFrame({
'Mean': mean_values,
'Standard Deviation': std_values
})
# Print the table
print(results_df)
num_samples = 1000 # Adjust the number of samples as needed
new_data = np.random.normal(mean_values, std_values, (num_samples, len(mean_values)))
new_df = pd.DataFrame(new_data, columns=df1.columns)
new_df.head(5)
import scipy.stats as stats
fig, axes = plt.subplots(nrows=int(np.ceil(len(df1.columns) / 2)), ncols=2, figsize=(10, 10))
for i, column in enumerate(df1.columns):
data = df1[column]
mu, sigma = stats.norm.fit(data)
x = np.linspace(min(data) - 3 * sigma, max(data) + 3 * sigma, 100)
y = stats.norm.pdf(x, mu, sigma)
axes.flatten()[i].plot(x, y, label='Original PDF')
axes.flatten()[i].hist(new_df[column], bins=30, density=True, alpha=0.5, label='Generated Data')
axes.flatten()[i].set_title(column)
axes.flatten()[i].set_xlabel('Values')
axes.flatten()[i].set_ylabel('Probability Density')
axes.flatten()[i].legend()
plt.tight_layout()
plt.show()
t_NSF=0
t_WQM=0
t_CCME=0
t_WA=0
t_DWA=0
for i in range(len(new_df)):
x=new_df.iloc[i]
if xato_bor(x)==True:
if NSF(x)[1]=='Good': t_NSF+=1
if WQM(x)[1]=='Good': t_WQM+=1
if CCME(x)[1]=='Good': t_CCME+=1
if WA(x)[1]=='Good': t_WA+=1
if DWA(x)[1]=='Good': t_DWA+=1
else:
if NSF(x)[1]=='Poor': t_NSF+=1
if WQM(x)[1]=='Poor': t_WQM+=1
if CCME(x)[1]=='Poor': t_CCME+=1
if WA(x)[1]=='Poor': t_WA+=1
if DWA(x)[1]=='Poor': t_DWA+=1
t={'NSF':t_NSF,'WQM':t_WQM,'CCME':t_CCME,'WA':t_WA,'DWA':t_DWA}
print(t)
labels = list(t.keys())
values = list(t.values())
plt.figure(figsize=(5, 3))
plt.bar(labels, values, color='blue')
plt.title('Incorrectness of Data')
plt.xticks(rotation=45)
plt.show()
H0 = 0.005 # null hypothesis
alpha = 0.05 # Significance level
print('p-values')
for model_name in t:
p_values = binomtest(t[model_name], 1000, p=H0).pvalue
if p_values < alpha:
text='H0 qabul qilinmasligiga statistik asos bor'
else:
text='H0 qabul qilinishiga statistik asos bor'
print(model_name,'----',f"{p_values:.3f}",'----',text)
df = df[['NSF', 'CCME', 'WA', 'DWA']]
categories = ['Good', 'Fair', 'Marginal', 'Poor']
freq_dict = {}
for col in df.columns:
value_counts = df[col].value_counts().to_dict()
freq_dict[col] = {category: value_counts.get(category, 0) for category in categories}
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
if len(freq_dict) > 1:
axes = axes.flatten()
for i, (model, counts) in enumerate(freq_dict.items()):
total = sum(counts.values())
percentages = [(counts[cat] / total) * 100 for cat in categories]
axes[i].bar(categories, percentages, color=['blue', 'orange', 'green', 'red'])
axes[i].set_title(f'{model} Distribution (%)')
plt.tight_layout()
plt.show()
